*********************************
*Table 1: Descriptive statistics*
*********************************

*Education
use "$final_data_outcomes/czone_StudentsLongDifferences.dta", clear
merge 1:1 czone using "$final_data_outcomes/czone_CollegeEducatedLongDifferences.dta", assert(3) nogenerate

*Migrants
merge 1:1 czone using "$final_data_outcomes/czone_StudentMigrantsLongDifferences.dta", assert(3) nogenerate

*Replaceable
merge 1:1 czone using "$final_data_outcomes/czone_EducationReplaceability.dta", assert(3) nogenerate

********************************************
*LLM outcomes and education from Census/ACS*
********************************************

*Levels: mean, sd, min, max (separate because of weights in sum command)
foreach var of varlist share_sc* share_b* share_a* share_l* {

qui su `var' [w=ipums_pop_1990]
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)' 

}

foreach var of varlist pshare_sc* {

qui su `var' [w=ipums_pop_1980]
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)' 

}

foreach var of varlist share_Mig* {

qui su `var' [w=ipums_popMig_1990]
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)' 

}

foreach var of varlist share_rep* {

qui su `var' [w=ipums_emp_1990]
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)' 

}


*Changes: only mean
foreach var of varlist d_sc* d_b d_a d_l {

qui su `var' [w=ipums_pop_1990]
gen d_share_`var'_1990 = `r(mean)' 

}

foreach var of varlist pd_sc* {

qui su `var' [w=ipums_pop_1980]
gen d_pshare_`var'_1980 = `r(mean)' 

}

foreach var of varlist d_Mig* {

qui su `var' [w=ipums_pop_1990]
gen d_share_`var'_1990 = `r(mean)' 

}

foreach var of varlist d_rep* {

qui su `var' [w=ipums_emp_1990]
gen d_share_`var'_1990 = `r(mean)' 

}

keep mean_* sd_* min_* max_* d_share_d* d_pshare_pd*
keep if _n==1
rename (d_share_d_* d_pshare_pd_*_1980) (d_share_* d_pshare_*)
gen v = 1

*Keep only one type of migrants (migrants_5/4, as in paper by Abigail et al on internal migration in the US)
drop *N5* *pshare*

reshape long mean_ sd_ min_ max_ d_, i(v) j(var) string

*Put in right order
gen id = 1 if var=="share_sc_1990"
replace id = 2 if var=="share_sc_a19_24_1990"
replace id = 3 if var=="share_sc_a25_29_1990"
replace id = 4 if var=="share_sc_a30_34_1990"
replace id = 5 if var=="share_sc_a35_49_1990"
replace id = 6 if var=="share_sc_a50_64_1990"
replace id = 7 if var=="share_MigStudN4_a19_64_1990"
replace id = 8 if var=="share_MigStudN4_a19_34_1990"
replace id = 9 if var=="share_MigN4_a19_64_1990"
replace id = 10 if var=="share_MigN4_a19_34_1990"
replace id = 11 if var=="share_b_1990"
replace id = 12 if var=="share_a_1990"
replace id = 13 if var=="share_l_1990"
replace id = 14 if var=="share_rep_1990"
replace id = 15 if var=="share_rep_b_1990"
replace id = 16 if var=="share_rep_a_1990"
replace id = 17 if var=="share_rep_l_1990"
replace id = 18 if var=="share_sc_a19_34_1990"
replace id = 19 if var=="share_sc_a35_64_1990"

sort id

*Names
gen name = "Students" if var=="share_sc_1990"
replace name = "19-24 years" if var=="share_sc_a19_24_1990"
replace name = "25-29 years" if var=="share_sc_a25_29_1990"
replace name = "30-34 years" if var=="share_sc_a30_34_1990"
replace name = "35-49 years" if var=="share_sc_a35_49_1990"
replace name = "50-64 years" if var=="share_sc_a50_64_1990"
replace name = "Students: 19-34 years" if var=="share_sc_a19_34_1990"
replace name = "Students: 35-64 years" if var=="share_sc_a35_64_1990"
replace name = "Students who migranted in previous year" if var=="share_MigStudN4_a19_64_1990"
replace name = "19-34 years" if var=="share_MigStudN4_a19_34_1990"
replace name = "Individuals who migranted in previous year" if var=="share_MigN4_a19_64_1990"
replace name = "19-34 years" if var=="share_MigN4_a19_34_1990"
replace name = "Employed in replaceable job" if var=="share_rep_1990"
replace name = "Bachelor's degree employed in replaceable job" if var=="share_rep_b_1990"
replace name = "No college degree employed in replaceable job" if var=="share_rep_l_1990"
replace name = "Associate's degree employed in replaceable job" if var=="share_rep_a_1990"
replace name = "Bachelor's degree" if var=="share_b_1990"
replace name = "Associate's degree" if var=="share_a_1990"
replace name = "No college degree" if var=="share_l_1990"

*Display
foreach q of varlist mean* sd* min* max* d* {
replace `q' = round(`q',.01)
}

tostring mean* sd* min* max* d*, replace format(%7.4g) force

foreach q of varlist mean* sd* min* max* d* {
replace `q' = substr(`q',1,4) 
replace `q' = substr(`q',1,3) if substr(`q',1,1)=="."
}

foreach var of varlist mean* sd* min* max* d* {
replace `var' = "-0" + substr(`var',2,3) if substr(`var',1,2)=="-."
replace `var' = `var'+"0" if length(`var')==4 & substr(`var',1,1)=="-"
replace `var' = "0" + `var' if length(`var')==3 & substr(`var',1,1)=="."
replace `var' = "0" + `var' if length(`var')==2 & substr(`var',1,1)=="."
replace `var' = `var' + "0" if length(`var')==2
replace `var' = `var' + "0" if length(`var')==3
replace `var' = `var' + ".00" if length(`var')==1
}

*Table, Panel A
keep *_ name
keep if !mi(name)
order name mean sd min max d
br

********************************************
* Institutional characteristics from IPEDS *
********************************************

use "$final_data_outcomes/IPEDS_institutionsDescriptivesCZ.dta", clear

*Round such that we have defined number of institutions
foreach var in public private communityCollege university {
replace `var'1994 = round(`var'1994, 1)
gen d_`var'_94_07 = round(`var'2007,1) - `var'1994
}

rename (share*) (*)

su public1994 private1994 communityCollege1994 university1994
su d_public_94_07 d_private_94_07 d_communityCollege_94_07 d_university_94_07

foreach var in Natural EngCom SocialS BusEco HumArts Manuf HealthS PubMil {
replace `var'1994 = `var'1994*100

gen d_`var'_94_07 = `var'2007*100 - `var'1994
}
su Natural1994 EngCom1994 SocialS1994 BusEco1994 HumArts1994 Manuf1994 HealthS1994 PubMil1994
su d_Natural_94_07 d_EngCom_94_07 d_SocialS_94_07 d_BusEco_94_07 d_HumArts_94_07 d_Manuf_94_07 d_HealthS_94_07 d_PubMil_94_07

keep czone public1994 private1994 communityCollege1994 university1994 Natural1994 EngCom1994 SocialS1994 BusEco1994 HumArts1994 Manuf1994 HealthS1994 PubMil1994 d_*_94_07

foreach var of varlist *1994 {

qui su `var'
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)'

}

foreach var of varlist d_*_94_07 {

qui su `var'
replace `var' = `r(mean)' 

}

keep mean_* sd_* min_* max_* d_*
keep if _n==1
gen v = 1

reshape long mean_ sd_ min_ max_ d_, i(v) j(var) string

sort var d_
replace d_ = d_[_n+1] if mi(d_) & !mi(d_[_n+1])

drop if mi(mean_)

gen id = 1 if var=="public1994"
replace id = 2 if var=="private1994"
replace id = 3 if var=="communityCollege1994"
replace id = 4 if var=="university1994"
replace id = 5 if var=="BusEco1994"
replace id = 6 if var=="EngCom1994"
replace id = 7 if var=="HealthS1994"
replace id = 8 if var=="HumArts1994"
replace id = 9 if var=="Manuf1994"
replace id = 10 if var=="Natural1994"
replace id = 11 if var=="PubMil1994"
replace id = 12 if var=="SocialS1994"

sort id

gen name = "Public institutions" if var=="public1994"
replace name = "Private institutions" if var=="private1994"
replace name = "Community colleges" if var=="communityCollege1994"
replace name = "Universities" if var=="university1994"
replace name = "Business and Economics" if var=="BusEco1994"
replace name = "Computer Science and Engineering" if var=="EngCom1994"
replace name = "Health Science" if var=="HealthS1994"
replace name = "Arts and Humanities" if var=="HumArts1994"
replace name = "Natural Science" if var=="Natural1994"
replace name = "Manufacturing" if var=="Manuf1994"
replace name = "Public and Military" if var=="PubMil1994"
replace name = "Social Science" if var=="SocialS1994"

*Display
foreach q of varlist mean* sd* min* max* d* {
replace `q' = round(`q',.01)
}

tostring mean* sd* min* max* d*, replace format(%7.4g) force

foreach q of varlist mean* sd* min* max* d* {
replace `q' = substr(`q',1,4) 
replace `q' = substr(`q',1,3) if substr(`q',1,1)=="."
}

foreach var of varlist mean* sd* min* max* d* {
replace `var' = "-0" + substr(`var',2,3) if substr(`var',1,2)=="-."
replace `var' = `var'+"0" if length(`var')==4 & substr(`var',1,1)=="-"
replace `var' = "0" + `var' if length(`var')==3 & substr(`var',1,1)=="."
replace `var' = "0" + `var' if length(`var')==2 & substr(`var',1,1)=="."
replace `var' = `var' + "0" if length(`var')==3 & substr(`var',2,1)=="."
replace `var' = `var' + ".0" if length(`var')==2 
replace `var' = `var' + ".00" if length(`var')==1
}

*Table, Panel C
keep *_ name
keep if !mi(name)
order name mean sd min max d
br

************ ENROLLMENT
use "$clean_data_education/fallEnrollmentTotalsReshaped.dta", clear
merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", keep(3) nogenerate  // we need locations only for the obs that are availabe, there are also some obs in 1990 that do not have info on location (_merge = 1)
merge m:1 unitid using "$clean_data_education/institutionControls.dta", keep(1 3) keepusing(forProfit public iclevel freshmenFullTimeAvg studentsAvg sector) nogenerate  // there will be some _merge = 1 from alaska and co

	drop if nonMainland == 1 | (year == 1990 & mi(czone)) // dropping uni in alaska and so on and dropping some obs in 1991 that have details for enrollment but not on institution details
	keep if yearGroup == 1994 | yearGroup == 2007
		replace yearGroup = 1990 if yearGroup == 1994
	drop if forProfit == 1
	drop if freshmenFullTimeAvg < 50
	drop if periods < 3

* total students IPEDS	
egen total11 = rsum(total3 total12), missing
drop total1 total2 total4 total12 total21 total22 total23 total24 total41 total42 total43 total44 total3
rename total11 students
* total students public
gen totPublic = 0
	replace totPublic = students if public == 1
* total students private
gen totPrivate = 0
	replace totPrivate = students if public == 0
* 4Y institutions
gen tot4Y = 0
	replace tot4Y = students if (sector >= 1 & sector <= 3) 
* 2Y institutions
gen tot2Y = 0
	replace tot2Y = students if (sector >= 4 & sector <= 6) 
* L2Y institutions
gen totL2Y = 0
	replace totL2Y = students if (sector >= 7 & sector <= 9) 

*** weighting 
foreach i of var students tot* {	
	replace `i' = `i' * ourRatio
	}
	
*** collapse at czone-year level
collapse (last) statefip region division name_czone (sum) students tot*, by(czone yearGroup)

*** share of students IPEDS
foreach var of var tot* {
	gen  share_`var' = `var'/students
	}

*** reshape
drop students tot*
reshape wide share_* , i(czone) j(yearGroup) 

*** merge totals from Census
merge 1:1 czone using "$clean_data_lmarket/czone1990_schooling.dta", keep(3) nogenerate
merge 1:1 czone using "$clean_data_lmarket/czone2008_schooling.dta", keep(3) nogenerate

* 1990 
foreach var of var share*1990 {
	gen `var'_cen = `var'*ipums_pop_sc_1990/ipums_pop_1990*100
}

* 2007
foreach var of var share*2007 {
	gen `var'_cen = `var'*ipums_pop_sc_2008/ipums_pop_2008*100
}

* Long differences
gen d_share_totPublic1990_cen = share_totPublic2007_cen - share_totPublic1990_cen
gen d_share_totPrivate1990_cen = share_totPrivate2007_cen - share_totPrivate1990_cen
gen d_share_tot4Y1990_cen = share_tot4Y2007_cen - share_tot4Y1990_cen
gen d_share_community1990_cen = (share_tot2Y2007_cen + share_totL2Y2007_cen) - (share_tot2Y1990_cen + share_totL2Y1990_cen)

gen share_community1990_cen = share_tot2Y1990_cen + share_totL2Y1990_cen

*** keep useful variables
keep czone ipums_pop_1990 d_share_* share_totPublic1990_cen share_totPrivate1990_cen share_tot4Y1990_cen share_community1990_cen


*** Summary

foreach var of varlist share_* {
qui su `var' [w=ipums_pop_1990]
gen mean_`var' = `r(mean)' 
gen sd_`var' = `r(sd)' 
gen min_`var' = `r(min)' 
gen max_`var' = `r(max)' 

}

foreach var of varlist d_share_* {
qui su `var' [w=ipums_pop_1990]
replace `var' = `r(mean)' 
}

keep mean_* sd_* min_* max_* d_*
keep if _n==1
gen v = 1


reshape long mean_ sd_ min_ max_ d_, i(v) j(var) string

gen id = 1 if var=="share_totPublic1990_cen"
replace id = 2 if var=="share_totPrivate1990_cen"
replace id = 3 if var=="share_community1990_cen"
replace id = 4 if var=="share_tot4Y1990_cen"

sort id 

gen name = "Public institutions" if var=="share_totPublic1990_cen"
replace name = "Private institutions" if var=="share_totPrivate1990_cen"
replace name = "Community colleges" if var=="share_community1990_cen"
replace name = "Universities" if var=="share_tot4Y1990_cen"

*Display
foreach q of varlist mean* sd* min* max* d* {
replace `q' = round(`q',.01)
}

tostring mean* sd* min* max* d*, replace format(%7.4g) force

foreach q of varlist mean* sd* min* max* d* {
replace `q' = substr(`q',1,4) 
replace `q' = substr(`q',1,3) if substr(`q',1,1)=="."
}

foreach var of varlist mean* sd* min* max* d* {
replace `var' = "-0" + substr(`var',2,3) if substr(`var',1,2)=="-."
replace `var' = `var'+"0" if length(`var')==4 & substr(`var',1,1)=="-"
replace `var' = "0" + `var' if length(`var')==3 & substr(`var',1,1)=="."
replace `var' = "0" + `var' if length(`var')==2 & substr(`var',1,1)=="."
replace `var' = `var' + "0" if length(`var')==3 & substr(`var',2,1)=="."
}

*Table, Panel B
keep *_ name
keep if !mi(name)
order name mean sd min max d
br



